{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "c3a1cd87-3884-48b6-9d97-fc876a36725b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.tree import DecisionTreeRegressor\n",
    "from lightgbm import LGBMRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "e318101c-d586-403a-af2d-2d7882b2bb15",
   "metadata": {},
   "outputs": [],
   "source": [
    "windy_power = pd.read_csv(\"./dataset/1_windy_power_2024-01-01_2024-12-31.csv\")\n",
    "windy_weather1 = pd.read_csv(\"./dataset/2_sunny_weather_1_2024-01-01_2025-02-28.csv\")\n",
    "windy_weather2 = pd.read_csv(\"./dataset/2_sunny_weather_2_2024-01-01_2025-02-28.csv\")\n",
    "windy_weather3 = pd.read_csv(\"./dataset/2_sunny_weather_3_2024-01-01_2025-02-28.csv\")\n",
    "\n",
    "suny_power = pd.read_csv(\"./dataset/2_sunny_power_2024-01-01_2024-12-31.csv\")\n",
    "suny_weather1 = pd.read_csv(\"./dataset/2_sunny_weather_1_2024-01-01_2025-02-28.csv\")\n",
    "suny_weather2 = pd.read_csv(\"./dataset/2_sunny_weather_2_2024-01-01_2025-02-28.csv\")\n",
    "suny_weather3 = pd.read_csv(\"./dataset/2_sunny_weather_3_2024-01-01_2025-02-28.csv\")\n",
    "\n",
    "windy_weather1.columns = [\"time\"] + [\"NWP_1_\" + x for x in windy_weather1.columns[1:]]\n",
    "windy_weather2.columns = [\"time\"] + [\"NWP_2_\" + x for x in windy_weather2.columns[1:]]\n",
    "windy_weather3.columns = [\"time\"] + [\"NWP_3_\" + x for x in windy_weather3.columns[1:]]\n",
    "\n",
    "suny_weather1.columns = [\"time\"] + [\"NWP_1_\" + x for x in suny_weather1.columns[1:]]\n",
    "suny_weather2.columns = [\"time\"] + [\"NWP_2_\" + x for x in suny_weather2.columns[1:]]\n",
    "suny_weather3.columns = [\"time\"] + [\"NWP_3_\" + x for x in suny_weather3.columns[1:]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "id": "2025b848-4d2e-4da7-bff9-574b99549d04",
   "metadata": {},
   "outputs": [],
   "source": [
    "windy_power[\"time\"] = pd.to_datetime(windy_power[\"time\"])\n",
    "windy_weather1[\"time\"] = pd.to_datetime(windy_weather1[\"time\"])\n",
    "windy_weather2[\"time\"] = pd.to_datetime(windy_weather2[\"time\"])\n",
    "windy_weather3[\"time\"] = pd.to_datetime(windy_weather3[\"time\"])\n",
    "\n",
    "suny_power[\"time\"] = pd.to_datetime(suny_power[\"time\"])\n",
    "suny_weather1[\"time\"] = pd.to_datetime(suny_weather1[\"time\"])\n",
    "suny_weather2[\"time\"] = pd.to_datetime(suny_weather2[\"time\"])\n",
    "suny_weather3[\"time\"] = pd.to_datetime(suny_weather3[\"time\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "10d5a12d-faea-4795-98e0-10739387498f",
   "metadata": {},
   "outputs": [],
   "source": [
    "windy_train_test = windy_power.merge(windy_weather1, on=\"time\", how=\"right\").merge(windy_weather2, on=\"time\", how=\"right\").merge(windy_weather3, on=\"time\", how=\"right\")\n",
    "windy_train_test['year'] = windy_train_test['time'].dt.year \n",
    "windy_train_test['month'] = windy_train_test['time'].dt.month \n",
    "windy_train_test['day'] = windy_train_test['time'].dt.day\n",
    "windy_train_test['dayofweek_num'] = windy_train_test['time'].dt.dayofweek  \n",
    "windy_train_test['dayofweek_name'] = windy_train_test['time'].dt.weekday\n",
    "\n",
    "windy_train = windy_train_test[~windy_train_test[\"real_power\"].isnull()]\n",
    "windy_test = windy_train_test[windy_train_test[\"real_power\"].isnull()].tail(5664)\n",
    "\n",
    "\n",
    "\n",
    "suny_train_test = suny_power.merge(suny_weather1, on=\"time\", how=\"right\").merge(suny_weather3, on=\"time\", how=\"right\").merge(suny_weather3, on=\"time\", how=\"right\")\n",
    "suny_train_test['year'] = suny_train_test['time'].dt.year \n",
    "suny_train_test['month'] = suny_train_test['time'].dt.month \n",
    "suny_train_test['day'] = suny_train_test['time'].dt.day\n",
    "suny_train_test['dayofweek_num'] = suny_train_test['time'].dt.dayofweek  \n",
    "suny_train_test['dayofweek_name'] = suny_train_test['time'].dt.weekday\n",
    "\n",
    "suny_train = suny_train_test[~suny_train_test[\"real_power\"].isnull()]\n",
    "suny_test = suny_train_test[suny_train_test[\"real_power\"].isnull()].tail(5664)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "b649bfa1-1e54-45ee-a01f-591a2dc0cd7f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.020535 seconds.\n",
      "You can set `force_row_wise=true` to remove the overhead.\n",
      "And if memory is not enough, you can set `force_col_wise=true`.\n",
      "[LightGBM] [Info] Total Bins 3314\n",
      "[LightGBM] [Info] Number of data points in the train set: 35015, number of used features: 22\n",
      "[LightGBM] [Info] Start training from score 35.312050\n"
     ]
    }
   ],
   "source": [
    "model = LGBMRegressor()\n",
    "model.fit(windy_train.drop([\"time\", \"real_power\"], axis=1), windy_train[\"real_power\"])\n",
    "pred = model.predict(windy_test.drop([\"time\", \"real_power\"], axis=1))\n",
    "\n",
    "ouput1 = pd.read_csv(\"output1.csv\")\n",
    "ouput1[\"predict_power\"] = pred\n",
    "ouput1.to_csv(\"output1.csv\", index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "5cc1f858-8721-40ed-bb60-a5fe257255f8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001781 seconds.\n",
      "You can set `force_col_wise=true` to remove the overhead.\n",
      "[LightGBM] [Info] Total Bins 3470\n",
      "[LightGBM] [Info] Number of data points in the train set: 34694, number of used features: 22\n",
      "[LightGBM] [Info] Start training from score 5.022930\n"
     ]
    }
   ],
   "source": [
    "model = LGBMRegressor()\n",
    "model.fit(suny_train.drop([\"time\", \"real_power\"], axis=1), suny_train[\"real_power\"])\n",
    "pred = model.predict(suny_test.drop([\"time\", \"real_power\"], axis=1))\n",
    "\n",
    "ouput2 = pd.read_csv(\"output2.csv\")\n",
    "ouput2[\"predict_power\"] = pred\n",
    "ouput2.to_csv(\"output2.csv\", index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "1cb79394-26ce-488f-9dec-bac6328b9f4f",
   "metadata": {},
   "outputs": [],
   "source": [
    "!\\rm output.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "a8f51c14-8724-4191-95e0-19136afe87eb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  adding: output1.csv (deflated 70%)\n",
      "  adding: output2.csv (deflated 79%)\n"
     ]
    }
   ],
   "source": [
    "!zip output.zip output1.csv output2.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1dba0230-43dc-43e1-a679-743ac80876ee",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "py3.11",
   "language": "python",
   "name": "py3.11"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
